// Copyright 2019 The TCMalloc Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Unittest for the TCMalloc implementation.
//
// * The test consists of a set of threads.
// * Each thread maintains a set of allocated objects, with
//   a bound on the total amount of data in the set.
// * Each allocated object's contents are generated by
//   hashing the object pointer, and a generation count
//   in the object.  This allows us to easily check for
//   data corruption.
// * At any given step, the thread can do any of the following:
//     a. Allocate an object
//     b. Increment an object's generation count and update
//        its contents.
//     c. Pass the object to another thread
//     d. Free an object
//   Also, at the end of every step, object(s) are freed to maintain
//   the memory upper-bound.

#define _XOPEN_SOURCE 600
#define _GNU_SOURCE 1
#include <errno.h>
#include <malloc.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <algorithm>
#include <array>
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <limits>
#include <new>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/macros.h"
#include "absl/container/flat_hash_set.h"
#include "absl/numeric/bits.h"
#include "absl/random/random.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "absl/types/optional.h"
#include "tcmalloc/alloc_at_least.h"
#include "tcmalloc/common.h"
#include "tcmalloc/huge_pages.h"
#include "tcmalloc/internal/config.h"
#include "tcmalloc/internal/declarations.h"
#include "tcmalloc/internal/logging.h"
#include "tcmalloc/internal/parameter_accessors.h"
#include "tcmalloc/malloc_extension.h"
#include "tcmalloc/pages.h"
#include "tcmalloc/parameters.h"
#include "tcmalloc/testing/test_allocator_harness.h"
#include "tcmalloc/testing/testutil.h"
#include "tcmalloc/testing/thread_manager.h"

// Testing parameters
//
// When making aligned allocations, we pick a power of two up to 1 <<
// kLogMaxMemalign.
const int kLogMaxMemalign = 18;

extern "C" void* reallocarray(void* ptr, size_t nmemb, size_t size);

#if !defined(__GLIBC__)
extern "C" int malloc_trim(size_t pad);
#endif

#if !defined(__GLIBC__)
extern "C" int malloc_info(int opt, FILE* fp) noexcept;
#endif

static const int kSizeBits = 8 * sizeof(size_t);
static const size_t kMaxTestSize = ~static_cast<size_t>(0);
static const size_t kMaxSignedSize = ((size_t(1) << (kSizeBits - 1)) - 1);

#ifndef __riscv
// This goes slightly beyond kMaxSize to test both small and page allocations.
static const size_t kMaxTestAllocSize = 384 << 10;
#else
// Execution is very slow on riscv.
static const size_t kMaxTestAllocSize = 4 << 10;
#endif  // #ifndef __riscv

namespace tcmalloc {
extern ABSL_ATTRIBUTE_WEAK bool want_hpaa();
}  // namespace tcmalloc

int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  constexpr size_t kGiB = 1024 * 1024 * 1024;
  SetTestResourceLimit(/*limit=*/8 * kGiB);

  return RUN_ALL_TESTS();
}

namespace tcmalloc::tcmalloc_internal {
namespace {

TEST(TcmallocTest, EmptyAllocations) {
  // Check that empty allocation works
  void* p1 = ::operator new(0);
  ASSERT_NE(p1, nullptr);
  void* p2 = ::operator new(0);
  ASSERT_NE(p2, nullptr);
  ASSERT_NE(p1, p2);
  ::operator delete(p1);
  ::operator delete(p2);
}

TEST(TcmallocTest, LargeAllocation) {
  // Check that "lots" of memory can be allocated
  constexpr size_t kMB = 1 << 20;
  ::operator delete(::operator new(100 * kMB));
}

TEST(TcmallocTest, Calloc) {
  // Check calloc() with various arguments

  struct TestCase {
    size_t n;
    size_t s;
    bool ok;
  };

  constexpr TestCase tests[] = {
      {0, 0, true},
      {0, 1, true},
      {1, 1, true},
      {1 << 10, 0, true},
      {1 << 20, 0, true},
      {0, 1 << 10, true},
      {0, 1 << 20, true},
      {1 << 20, 2, true},
      {2, 1 << 20, true},
      {1000, 1000, true},
      {kMaxTestSize, 2, false},
      {2, kMaxTestSize, false},
      {kMaxTestSize, kMaxTestSize, false},
      {kMaxSignedSize, 3, false},
      {3, kMaxSignedSize, false},
      {kMaxSignedSize, kMaxSignedSize, false},
  };

  for (const auto& t : tests) {
    SCOPED_TRACE(absl::StrFormat("calloc(%x, %x)", t.n, t.s));

    void* ptr = calloc(t.n, t.s);
    benchmark::DoNotOptimize(ptr);

    if (!t.ok) {
      EXPECT_EQ(errno, ENOMEM);
    }

    EXPECT_EQ(t.ok, ptr != nullptr);
    if (ptr != nullptr) {
      memset(ptr, 0, t.n * t.s);
      benchmark::DoNotOptimize(ptr);
    }

    // This is harmless if p == nullptr.
    free(ptr);
  }
}

TEST(TcmallocTest, Realloc) {
  // Test that realloc doesn't always reallocate and copy memory.
  constexpr int kLargeSize = (1 << 20) - (1 << 10);
  ASSERT_GT(kLargeSize, tcmalloc_internal::kMaxSize);
  constexpr int start_sizes[] = {100, 1000, 10000, 100000, kLargeSize};
  constexpr int deltas[] = {1,   -2, 4,    -8,      16,
                            -32, 64, -128, 1 << 10, -(2 << 10)};

  // When sampling, we always allocate in units of page-size, which makes
  // reallocs of small sizes do extra work (thus, failing these checks).
  // Since sampling is random, we turn off sampling to make sure that
  // doesn't happen to us here. But very large blocks shouldn't be
  // reallocated even with sampling.
  ScopedNeverSample never_sample;

  for (int s = 0; s < sizeof(start_sizes) / sizeof(*start_sizes); ++s) {
    void* p = malloc(start_sizes[s]);
    // We stash a copy of the pointer p so we can reference it later.  We must
    // work with the return value of p.
    //
    // Even if we successfully determine that realloc's return value is
    // equivalent to its input value, we must use the returned value under
    // penalty of UB.
    const intptr_t orig_ptr = absl::bit_cast<intptr_t>(p);
    benchmark::DoNotOptimize(p);

    ASSERT_NE(p, nullptr);
    // The larger the start-size, the larger the non-reallocing delta.
    for (int d = 0; d < (s + 1) * 2; ++d) {
      p = realloc(p, start_sizes[s] + deltas[d]);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": realloc should not allocate new memory"
          << " (" << start_sizes[s] << " + " << deltas[d] << ")";
    }
    // Test again, but this time reallocing smaller first.
    for (int d = 0; d < s * 2; ++d) {
      p = realloc(p, start_sizes[s] - deltas[d]);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": realloc should not allocate new memory"
          << " (" << start_sizes[s] << " + " << -deltas[d] << ")";
    }
    free(p);
  }
}

TEST(TcmallocTest, ReallocArray) {
  // Test that realloc doesn't always reallocate and copy memory.

  // When sampling, we always allocate in units of page-size, which makes
  // reallocs of small sizes do extra work (thus, failing these checks).  Since
  // sampling is random, we turn off sampling to make sure that doesn't happen
  // to us here.
  ScopedNeverSample never_sample;

  constexpr int start_sizes[] = {100, 1000, 10000, 100000};
  constexpr int deltas[] = {1, -2, 4, -8, 16, -32, 64, -128};

  for (int s = 0; s < sizeof(start_sizes) / sizeof(*start_sizes); ++s) {
    void* p = malloc(start_sizes[s]);
    // We stash a copy of the pointer p so we can reference it later.  We must
    // work with the return value of p.
    //
    // Even if we successfully determine that realloc's return value is
    // equivalent to its input value, we must use the returned value under
    // penalty of UB.
    const intptr_t orig_ptr = absl::bit_cast<intptr_t>(p);
    benchmark::DoNotOptimize(p);

    ASSERT_NE(p, nullptr);
    // The larger the start-size, the larger the non-reallocing delta.
    for (int d = 0; d < (s + 1) * 2; ++d) {
      p = reallocarray(p, start_sizes[s] + deltas[d], 1);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": reallocarray should not allocate new memory"
          << " (" << start_sizes[s] << " + " << deltas[d] << ")";
    }
    // Test again, but this time reallocing smaller first.
    for (int d = 0; d < s * 2; ++d) {
      p = reallocarray(p, start_sizes[s] - deltas[d], 1);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": reallocarray should not allocate new memory"
          << " (" << start_sizes[s] << " + " << -deltas[d] << ")";
    }
    free(p);
  }
}

TEST(TcmallocTest, ReallocArrayOverflow) {
  struct TestCase {
    size_t n;
    size_t s;
  };

  constexpr TestCase tests[] = {
      {kMaxTestSize, 2},
      {2, kMaxTestSize},
      {kMaxTestSize, kMaxTestSize},
      {kMaxSignedSize, 3},
      {3, kMaxSignedSize},
      {kMaxSignedSize, kMaxSignedSize},
  };

  for (const auto& t : tests) {
    SCOPED_TRACE(absl::StrFormat("reallocarray(nullptr, %x, %x)", t.n, t.s));

    void* ptr = reallocarray(nullptr, t.n, t.s);
    benchmark::DoNotOptimize(ptr);
    EXPECT_EQ(errno, ENOMEM);
    EXPECT_EQ(ptr, nullptr);
  }
}

TEST(TcmallocTest, MemalignRealloc) {
  constexpr size_t kDummySize = 42;
  char contents[kDummySize];
  memset(contents, 0x11, kDummySize);

  void* xs[2];
  xs[0] = memalign(16, kDummySize);
  ASSERT_EQ(0, posix_memalign(&xs[1], 16, kDummySize));

  for (void* x : xs) {
    memcpy(x, contents, sizeof(contents));

    ASSERT_NE(nullptr, x);
    void* y = realloc(x, 2 * kDummySize);
    // Reallocating memory obtained for memalign or posix_memalign should work.
    EXPECT_EQ(memcmp(contents, y, sizeof(contents)), 0);
    free(y);
  }
}

TEST(TCMallocTest, Multithreaded) {
  const int kThreads = 10;
  ThreadManager mgr;
  AllocatorHarness harness(kThreads);

  mgr.Start(kThreads, [&](int thread_id) { harness.Run(thread_id); });

  absl::SleepFor(absl::Seconds(3));

  mgr.Stop();
}

TEST(TCMallocTest, HugeThreadCache) {
  // Allocate more than 2^16 objects to trigger an integer overflow of 16-bit
  // counters.
  constexpr int kNum = 70000;
  constexpr size_t kSize = 10;
  std::vector<void*> arr;
  arr.reserve(kNum);

  for (int i = 0; i < kNum; i++) {
    arr.push_back(::operator new(kSize));
  }

  for (int i = 0; i < kNum; i++) {
    ::operator delete(arr[i], kSize);
  }
}

TEST(TCMallocTest, EnormousAllocations) {
  absl::BitGen rand;

  // Check that asking for stuff tiny bit smaller than largest possible
  // size returns NULL.
  for (size_t i = 0; i < 70000; i += absl::Uniform(rand, 1, 20)) {
    size_t size = kMaxTestSize - i;
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p;

    p = malloc(size);
    ASSERT_EQ(nullptr, p);
    EXPECT_EQ(ENOMEM, errno);

    p = ::operator new(size, std::nothrow);
    ASSERT_EQ(nullptr, p);
    p = ::operator new(size, static_cast<std::align_val_t>(16), std::nothrow);
    ASSERT_EQ(nullptr, p);

    size_t alignment = sizeof(p) << absl::Uniform(rand, 1, kLogMaxMemalign);
    ASSERT_NE(0, alignment);
    ASSERT_EQ(0, alignment % sizeof(void*));
    ASSERT_TRUE(absl::has_single_bit(alignment)) << alignment;
    int err = posix_memalign(&p, alignment, size);
    ASSERT_EQ(ENOMEM, err);
  }

  // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize)
  // might work or not, depending on the amount of virtual memory.
  for (size_t i = 0; i < 100; i++) {
    void* p;
    p = malloc(kMaxSignedSize + i);
    free(p);
    p = malloc(kMaxSignedSize - i);
    free(p);
  }

  for (size_t i = 0; i < 100; i++) {
    void* p;
    p = ::operator new(kMaxSignedSize + i, std::nothrow);
    ::operator delete(p);
    p = ::operator new(kMaxSignedSize - i, std::nothrow);
    ::operator delete(p);
  }

  // Check that ReleaseMemoryToSystem has no visible effect (aka, does not crash
  // the test):
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
}

static size_t GetUnmappedBytes() {
  std::optional<size_t> bytes =
      MallocExtension::GetNumericProperty("tcmalloc.pageheap_unmapped_bytes");
  TC_CHECK(bytes.has_value());
  return *bytes;
}

TEST(TCMallocTest, ReleaseMemoryToSystem) {
  // Similarly, the hugepage-aware allocator doesn't agree with PH about
  // where release is called for.
  if (&tcmalloc::want_hpaa == nullptr || tcmalloc::want_hpaa()) {
    return;
  }

  static const int MB = 1048576;
  void* a = ::operator new(MB);
  void* b = ::operator new(MB);
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  size_t starting_bytes = GetUnmappedBytes();

  // Calling ReleaseMemoryToSystem() a second time shouldn't do anything.
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  EXPECT_EQ(starting_bytes, GetUnmappedBytes());

  // ReleaseMemoryToSystem shouldn't do anything either.
  MallocExtension::ReleaseMemoryToSystem(MB);
  EXPECT_EQ(starting_bytes, GetUnmappedBytes());

  ::operator delete(a);

  // The span to release should be 1MB.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  // Should do nothing since the previous call released too much.
  MallocExtension::ReleaseMemoryToSystem(MB / 4);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  ::operator delete(b);

  // Use up the extra MB/4 bytes from 'a' and also release 'b'.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  // Should do nothing since the previous call released too much.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  // Nothing else to release.
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  a = ::operator new(MB);
  ::operator delete(a);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  // Releasing less than a page should still trigger a release.
  MallocExtension::ReleaseMemoryToSystem(1);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());
}

TEST(TCMallocTest, MallocTrim) { malloc_trim(0); }

TEST(TCMallocTest, NothrowSizedDelete) {
  struct Foo {
    double a;
  };
  // Foo should correspond to a size class used by new, but not by malloc.
  static_assert(sizeof(Foo) == 8, "Unexpected size for Foo");

  static constexpr int kNum = 100;
  Foo* ptrs[kNum];
  for (int i = 0; i < kNum; i++) {
    ptrs[i] = new (std::nothrow) Foo;
  }
  for (int i = 0; i < kNum; i++) {
    delete ptrs[i];
  }
}

TEST(TCMallocTest, NothrowSizedDeleteArray) {
  struct Foo {
    ~Foo() {}
    double a;
  };
  // Foo should correspond to a size class used by new, but not by malloc,
  // for some sizes k, sizeof(size_t) + sizeof(Foo) * k.  (sizeof(size_t) being
  // the size cookie of the implementation.)
  static_assert(sizeof(Foo) == 8, "Unexpected size for Foo");
  // With a non-trivially destructible type, we expect the compiler to insert a
  // size cookie so it can invoke sized delete[].
  static_assert(!std::is_trivially_destructible<Foo>::value,
                "Foo should not be trivially destructable, for sized delete[]");

  static constexpr int kNum = 100;
  Foo* ptrs[kNum];
  for (int i = 0; i < kNum; i++) {
    ptrs[i] = new (std::nothrow) Foo[i % 10];
  }
  for (int i = 0; i < kNum; i++) {
    delete[] ptrs[i];
  }
}

TEST(TCMallocTest, MallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = malloc(size);
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, CallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = calloc(size, (1 << (j % 5)));
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, ReallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = malloc(size);
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;

        const size_t new_size = (1 << (kNum % 16)) + (kNum % 3) - 1;
        void* new_ptr = realloc(ptrs[i], new_size);
        if (new_ptr == nullptr) {
          continue;
        }
        ptrs[i] = new_ptr;

        p = reinterpret_cast<uintptr_t>(new_ptr);
        ASSERT_EQ(0, p % alignof(std::max_align_t))
            << size << " -> " << new_size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, AlignedNew) {
  absl::BitGen rand;

  struct alloc {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<alloc> allocated;
  for (int i = 1; i < 100; ++i) {
    alloc a;
    a.size = absl::LogUniform(rand, 0, 1 << 20);
    a.alignment = static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));

    a.ptr = ::operator new(a.size, a.alignment);
    ASSERT_NE(a.ptr, nullptr);
    ASSERT_EQ(0, reinterpret_cast<uintptr_t>(a.ptr) %
                     static_cast<size_t>(a.alignment));
    allocated.emplace_back(a);
  }
  for (const auto& p : allocated) {
    if (absl::Bernoulli(rand, 0.5)) {
      ::operator delete(p.ptr, p.alignment);
    } else {
      ::operator delete(p.ptr, p.size, p.alignment);
    }
  }
}

TEST(TCMallocTest, AlignedNewArray) {
  absl::BitGen rand;

  struct alloc {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<alloc> allocated;
  for (int i = 1; i < 100; ++i) {
    alloc a;
    a.size = absl::LogUniform(rand, 0, 1 << 20);
    a.alignment = static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));

    a.ptr = ::operator new[](a.size, a.alignment);
    ASSERT_NE(a.ptr, nullptr);
    ASSERT_EQ(0, reinterpret_cast<uintptr_t>(a.ptr) %
                     static_cast<size_t>(a.alignment));
    allocated.emplace_back(a);
  }
  for (const auto& p : allocated) {
    if (absl::Bernoulli(rand, 0.5)) {
      ::operator delete[](p.ptr, p.alignment);
    } else {
      ::operator delete[](p.ptr, p.size, p.alignment);
    }
  }
}

TEST(TCMallocTest, NothrowAlignedNew) {
  absl::BitGen rand;
  for (int i = 1; i < 100; ++i) {
    size_t size = kMaxTestSize - i;
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p = ::operator new(size, alignment, std::nothrow);
    ASSERT_EQ(p, nullptr);
  }
  for (int i = 1; i < 100; ++i) {
    size_t size = absl::LogUniform(rand, 0, 1 << 20);
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    void* ptr = ::operator new(size, alignment, std::nothrow);
    ASSERT_NE(ptr, nullptr);
    ASSERT_EQ(
        0, reinterpret_cast<uintptr_t>(ptr) % static_cast<size_t>(alignment));
    ::operator delete(ptr, alignment, std::nothrow);
  }
}

TEST(TCMallocTest, NothrowAlignedNewArray) {
  absl::BitGen rand;
  for (int i = 1; i < 100; ++i) {
    size_t size = kMaxTestSize - i;
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p = ::operator new[](size, alignment, std::nothrow);
    ASSERT_EQ(p, nullptr);
  }
  for (int i = 1; i < 100; ++i) {
    size_t size = absl::LogUniform(rand, 0, 1 << 20);
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    void* ptr = ::operator new[](size, alignment, std::nothrow);
    ASSERT_NE(ptr, nullptr);
    ASSERT_EQ(
        0, reinterpret_cast<uintptr_t>(ptr) % static_cast<size_t>(alignment));
    ::operator delete[](ptr, alignment, std::nothrow);
  }
}

void CheckSizedDelete() {
  absl::BitGen rand;

  std::vector<std::pair<void*, size_t>> allocated;
  for (int i = 1; i < 100; ++i) {
    size_t alloc_size = absl::LogUniform<int32_t>(rand, 0, (1 << 20) - 1);
    void* p1 = ::operator new(alloc_size);
    ASSERT_NE(p1, nullptr);
    allocated.push_back(std::make_pair(p1, alloc_size));
  }
  for (std::vector<std::pair<void*, size_t>>::const_iterator i =
           allocated.begin();
       i != allocated.end(); ++i) {
    ::operator delete(i->first, i->second);
  }
}

TEST(TCMallocTest, SizedDelete) { CheckSizedDelete(); }

TEST(TCMallocTest, SizedDeleteSampled) {
  ScopedAlwaysSample always_sample;
  CheckSizedDelete();
}

// Check sampled allocations return the proper size.
TEST(TCMallocTest, SampleAllocatedSize) {
  ScopedAlwaysSample always_sample;
  for (int i = 0; i < 10; ++i) {
    void* ptr = malloc(64);
    ASSERT_EQ(64, MallocExtension::GetAllocatedSize(ptr));
    free(ptr);
  }
}

// Ensure that nallocx works before main.
struct GlobalNallocx {
  GlobalNallocx() { TC_CHECK_GE(nallocx(99, 0), 99); }
} global_nallocx;

#ifdef __GNUC__
// 101 is the max user priority.
static void check_global_nallocx() __attribute__((constructor(101)));
static void check_global_nallocx() { TC_CHECK_GE(nallocx(99, 0), 99); }
#endif

TEST(TCMallocTest, nallocx) {
  // Guarded allocations may have a smaller allocated size than nallocx
  // predicts.  So we disable guarded allocations.
  ScopedGuardedSamplingInterval gs(-1);

  for (size_t size = 0; size <= kMaxTestAllocSize; size += 11) {
    size_t rounded = nallocx(size, 0);
    ASSERT_GE(rounded, size);
    void* ptr = operator new(size);
    ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr));
    operator delete(ptr);
  }
}

TEST(TCMallocTest, nallocx_alignment) {
  // Guarded allocations may have a smaller allocated size than nallocx
  // predicts.  So we disable guarded allocations.
  ScopedGuardedSamplingInterval gs(-1);

  for (size_t size = 0; size <= kMaxTestAllocSize; size += 17) {
#ifndef __riscv
    for (auto align : {0, 1, 7, 8, 10}) {
#else
    for (auto align : {5, 7}) {
#endif
      size_t rounded = nallocx(size, MALLOCX_LG_ALIGN(align));
      ASSERT_GE(rounded, size);
      ASSERT_EQ(rounded % (1 << align), 0);
      void* ptr = memalign(1 << align, size);
      ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr));
      free(ptr);
    }
  }
}

TEST(TCMallocTest, sdallocx) {
  for (size_t size = 0; size <= 4096; size += 7) {
    void* ptr = malloc(size);
    memset(ptr, 0, size);
    benchmark::DoNotOptimize(ptr);
    sdallocx(ptr, size, 0);
  }
}

TEST(TCMallocTest, free_sized) {
  for (size_t size = 0; size <= 4096; size += 7) {
    void* ptr = malloc(size);
    memset(ptr, 0, size);
    benchmark::DoNotOptimize(ptr);
    free_sized(ptr, size);
  }
}

TEST(TCMallocTest, realloc_free_sized) {
  for (size_t size = 0; size <= 4096; size += 7) {
    void* ptr = malloc(size);
    size_t realloc_size = malloc_usable_size(ptr) + 1;
    ptr = realloc(ptr, realloc_size);
    memset(ptr, 0, realloc_size);
    benchmark::DoNotOptimize(ptr);
    free_sized(ptr, realloc_size);
  }
}

TEST(TCMallocTest, b421895944) {
  size_t size = 80;
  void* ptr = malloc(size);
  size_t realloc_size = malloc_usable_size(ptr) + 1;
  ptr = realloc(ptr, realloc_size);
  memset(ptr, 0, realloc_size);
  benchmark::DoNotOptimize(ptr);
  free_sized(ptr, realloc_size);
}

TEST(TCMallocTest, b421895944_2) {
  void* ptr = malloc(164471);
  ASSERT_NE(ptr, nullptr);
  void* newptr = realloc(ptr, 127823);
  ASSERT_NE(newptr, nullptr);
  free_sized(newptr, 127823);
}

TEST(TCMallocTest, b421895944_3) {
  ScopedAlwaysSample always_sample;
  void* ptr = malloc(388072);
  ASSERT_NE(ptr, nullptr);
  void* newptr = realloc(ptr, 388127);
  ASSERT_NE(newptr, nullptr);
  free_sized(newptr, 388127);
}

#ifndef NDEBUG
TEST(TCMallocTest, FreeSizedDeathTest) {
  void* ptr;
  const size_t size = 4096;
  const size_t alignment = 512;
  int err = posix_memalign(&ptr, 512, alignment);
  ASSERT_EQ(err, 0) << alignment << " " << size;
  EXPECT_DEATH(free_sized(ptr, size), "");
}
#endif

TEST(TCMallocTest, free_aligned_aligned) {
  for (size_t size = 7; size <= 4096; size += 7) {
    for (size_t align = 0; align <= 10; align++) {
      const size_t alignment = 1 << align;
      void* ptr = aligned_alloc(alignment, size);
      ASSERT_NE(ptr, nullptr) << alignment << " " << size;
      ASSERT_EQ(reinterpret_cast<uintptr_t>(ptr) & (alignment - 1), 0);
      memset(ptr, 0, size);
      benchmark::DoNotOptimize(ptr);
      free_aligned_sized(ptr, alignment, size);
    }
  }
}

#ifndef NDEBUG
TEST(TCMallocTest, FreeAlignedSizedDeathTest) {
  const size_t size = 128;
  const size_t alignment = 1024;
  void* ptr = malloc(size);
  ASSERT_NE(ptr, nullptr) << alignment << " " << size;
  EXPECT_DEATH(free_aligned_sized(ptr, size, alignment), "");
}
#endif

TEST(TCMallocTest, sdallocx_alignment) {
  for (size_t size = 0; size <= 4096; size += 7) {
    for (size_t align = 3; align <= 10; align++) {
      const size_t alignment = 1 << align;
      void* ptr;
      int err = posix_memalign(&ptr, alignment, size);
      ASSERT_EQ(err, 0) << alignment << " " << size;
      ASSERT_EQ(reinterpret_cast<uintptr_t>(ptr) & (alignment - 1), 0);
      memset(ptr, 0, size);
      benchmark::DoNotOptimize(ptr);
      sdallocx(ptr, size, MALLOCX_LG_ALIGN(align));
    }
  }
}

TEST(TCMallocTest, alloc_at_least) {
  for (size_t size = 0; size <= 4096; size += 7) {
    auto result = alloc_at_least(size);
    ASSERT_GE(result.size, size);
    memset(result.ptr, 0, size);
    if (result.size > size) {
      memset(static_cast<uint8_t*>(result.ptr) + size, 1, result.size - size);
      void* ptr = realloc(result.ptr, result.size + 1);
      ASSERT_NE(ptr, nullptr);
      result.ptr = ptr;
      ASSERT_GE(malloc_usable_size(result.ptr), result.size + 1);
      for (size_t i = size; i < result.size; ++i) {
        ASSERT_EQ(static_cast<uint8_t*>(result.ptr)[i], 1);
      }
      ++result.size;
    }
    benchmark::DoNotOptimize(result);
    ASSERT_GE(malloc_usable_size(result.ptr), result.size);
    free_sized(result.ptr, result.size);
  }
}

TEST(TCMallocTest, aligned_alloc_at_least) {
  for (size_t size = 7; size <= 4096; size += 7) {
    for (size_t align = 0; align <= 10; align++) {
      const size_t alignment = 1 << align;
      auto result = aligned_alloc_at_least(alignment, size);
      ASSERT_NE(result.ptr, nullptr) << alignment << " " << size;
      ASSERT_EQ(reinterpret_cast<uintptr_t>(result.ptr) & (alignment - 1), 0);
      ASSERT_GE(result.size, size);
      memset(result.ptr, 0, result.size);
      benchmark::DoNotOptimize(result);
      free_aligned_sized(result.ptr, alignment, result.size);
    }
  }
}

// Parse out a line like:
// <allocator_name>: xxx bytes allocated
// Return xxx as an int, nullopt if it can't be found
std::optional<int64_t> ParseLowLevelAllocator(absl::string_view allocator_name,
                                              absl::string_view buf) {
  char needlebuf[32];
  int len =
      absl::SNPrintF(needlebuf, sizeof(needlebuf), "\n%s: ", allocator_name);
  TC_CHECK(0 < len && len < sizeof(needlebuf));
  const absl::string_view needle = needlebuf;

  auto pos = buf.find(needle);
  if (pos == absl::string_view::npos) {
    return std::nullopt;
  }
  // skip over the prefix.  Should now look like " <number> bytes allocated".
  pos += needle.size();
  buf.remove_prefix(pos);

  pos = buf.find_first_not_of(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_prefix(pos);
  }

  pos = buf.find(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_suffix(buf.size() - pos);
  }

  int64_t result;
  if (!absl::SimpleAtoi(buf, &result)) {
    return std::nullopt;
  }
  return result;
}

TEST(TCMallocTest, GetStatsReportsLowLevel) {
  std::string stats = MallocExtension::GetStats();
  std::optional<int64_t> low_level_bytes =
      ParseLowLevelAllocator("MmapSysAllocator", stats);
  ASSERT_THAT(low_level_bytes, testing::Ne(std::nullopt));
  EXPECT_GT(*low_level_bytes, 0);
  size_t heap_size =
      *MallocExtension::GetNumericProperty("generic.current_allocated_bytes");

  // sanity check: we must have allocated as many bytes as in the heap
  EXPECT_GE(*low_level_bytes, heap_size);
}

namespace {
template <typename T1, typename T2>
void ExpectSameAddresses(T1 v1, T2 v2) {
  // C++ language requires a constant folding on constant inputs,
  // which may result to returning false for two aliased function,
  // because the aliasing is not known at this compilation unit.
  // Use volatile here to enforce a runtime comparison.
  volatile auto p1 = reinterpret_cast<void (*)()>(v1);
  volatile auto p2 = reinterpret_cast<void (*)()>(v2);
  const bool result = p1 == p2;
  // EXPECT_EQ seems not be able to handle volatiles.
  EXPECT_TRUE(result);
}
}  // end unnamed namespace

TEST(TCMallocTest, TestAliasedFunctions) {
  void* (*operator_new)(size_t) = &::operator new;
  void* (*operator_new_nothrow)(size_t, const std::nothrow_t&) =
      &::operator new;
  void* (*operator_new_array)(size_t) = &::operator new[];
  void* (*operator_new_array_nothrow)(size_t, const std::nothrow_t&) =
      &::operator new[];

  ExpectSameAddresses(operator_new, operator_new_array);
  ExpectSameAddresses(operator_new_nothrow, operator_new_array_nothrow);

  void (*operator_delete)(void*) = &::operator delete;
  void (*operator_delete_nothrow)(void*, const std::nothrow_t&) =
      &::operator delete;
  void (*operator_delete_array)(void*) = &::operator delete[];
  void (*operator_delete_array_nothrow)(void*, const std::nothrow_t&) =
      &::operator delete[];

  ExpectSameAddresses(operator_delete, operator_delete_nothrow);
  ExpectSameAddresses(operator_delete, operator_delete_array);
  ExpectSameAddresses(operator_delete, operator_delete_array_nothrow);
}

// This test is extremely slow on riscv.
#ifndef __riscv
enum class ThrowException { kNo, kYes };

class TcmallocSizedNewTest
    : public ::testing::TestWithParam<
          std::tuple<std::align_val_t, hot_cold_t, ThrowException>> {
 public:
  TcmallocSizedNewTest()
      : align_(std::get<0>(GetParam())),
        hot_cold_(std::get<1>(GetParam())),
        throw_exception_(std::get<2>(GetParam())) {
    const int align_needed = IsOveraligned();
    const int hot_cold_needed = (hot_cold_ != hot_cold_t{128});
    const int nothrow_needed = (throw_exception_ == ThrowException::kNo);
    const int encoding =
        (align_needed << 2) | (hot_cold_needed << 1) | nothrow_needed;
    switch (encoding) {
      case 0b000:
        sro_new_ = __size_returning_new;
        break;
      case 0b001:
        sro_new_ = tcmalloc_size_returning_operator_new_nothrow;
        break;
      case 0b010:
        sro_new_ = [this](size_t size) {
          return __size_returning_new_hot_cold(size, hot_cold_);
        };
        break;
      case 0b011:
        sro_new_ = [this](size_t size) {
          return tcmalloc_size_returning_operator_new_hot_cold_nothrow(
              size, hot_cold_);
        };
        break;
      case 0b100:
        sro_new_ = [this](size_t size) {
          return __size_returning_new_aligned(size, align_);
        };
        break;
      case 0b101:
        sro_new_ = [this](size_t size) {
          return tcmalloc_size_returning_operator_new_aligned_nothrow(size,
                                                                      align_);
        };
        break;
      case 0b110:
        sro_new_ = [this](size_t size) {
          return __size_returning_new_aligned_hot_cold(size, align_, hot_cold_);
        };
        break;
      case 0b111:
        sro_new_ = [this](size_t size) {
          return tcmalloc_size_returning_operator_new_aligned_hot_cold_nothrow(
              size, align_, hot_cold_);
        };
        break;
    }
  }

  sized_ptr_t New(size_t size) const { return sro_new_(size); }

  bool IsNothrow() const { return throw_exception_ == ThrowException::kNo; }

  bool IsOveraligned() const {
    return align_ > std::align_val_t{__STDCPP_DEFAULT_NEW_ALIGNMENT__};
  }

  std::align_val_t GetAlignment() const { return align_; }

  void Delete(sized_ptr_t res) const {
    if (IsOveraligned()) {
      ::operator delete(res.p, align_);
    } else {
      ::operator delete(res.p);
    }
  }

 private:
  std::align_val_t align_;
  hot_cold_t hot_cold_;
  ThrowException throw_exception_;
  // Size returning operator new.
  std::function<sized_ptr_t(size_t)> sro_new_;
};

INSTANTIATE_TEST_SUITE_P(
    AlignedHotColdThrow, TcmallocSizedNewTest,
    testing::Combine(
        testing::Values(1, 8, 32, 64),
        testing::Values(hot_cold_t(0), hot_cold_t{128}, hot_cold_t{255}),
        testing::Values(ThrowException::kNo, ThrowException::kYes)),
    [](const testing::TestParamInfo<TcmallocSizedNewTest::ParamType>& info) {
      std::string name = absl::StrCat(
          "Align", std::get<0>(info.param), "HotCold",
          static_cast<int>(std::get<1>(info.param)),
          std::get<2>(info.param) == ThrowException::kNo ? "Nothrow" : "Throw");
      return name;
    });

TEST_P(TcmallocSizedNewTest, SizedOperatorNewReturnsExtraCapacity) {
  // Turn off guarded sampling, since it will cause us to return the exact size
  // when we do sample and guard.
  //
  // This is not interesting for this test.  Other tests confirm that we get at
  // least requested bytes back.
  ScopedGuardedSamplingInterval gs(-1);

  // For release / no sanitizer builds, tcmalloc does return
  // the next available class size, which we know is always at least
  // properly aligned, so size 3 should always return extra capacity.
  sized_ptr_t res = New(3);
  EXPECT_THAT(res.n, testing::Ge(8));
  Delete(res);
}

TEST_P(TcmallocSizedNewTest, SizedOperatorNew) {
  for (size_t size = 0; size < 16 * 1024; size += 3) {
    sized_ptr_t res = New(size);
    EXPECT_NE(res.p, nullptr);
    EXPECT_GE(res.n, size);
    size_t max_increase = static_cast<size_t>(GetAlignment()) <= 8 ? 2 : 3;
    EXPECT_LE(res.n, std::max(size + 100, max_increase * size));
    benchmark::DoNotOptimize(memset(res.p, 0xBF, res.n));
    Delete(res);
  }
}

TEST_P(TcmallocSizedNewTest, InvalidSizedOperatorNew) {
  constexpr size_t kBadSize = std::numeric_limits<size_t>::max();
  if (IsNothrow()) {
    sized_ptr_t res = New(kBadSize);
    EXPECT_EQ(res.p, nullptr);
    EXPECT_EQ(res.n, 0);
  } else {
    EXPECT_DEATH(New(kBadSize), "");
  }
}

TEST_P(TcmallocSizedNewTest, SizedOperatorNewMatchesMallocExtensionValue) {
  // Set reasonable sampling and guarded sampling probabilities.
  ScopedProfileSamplingInterval s(2000);
  ScopedGuardedSamplingInterval gs(20);

  auto test = [&](size_t size) {
    sized_ptr_t r = New(size);
    ASSERT_EQ(r.n, MallocExtension::GetAllocatedSize(r.p));
    if (IsOveraligned()) {
      ::operator delete(r.p, r.n, GetAlignment());
    } else {
      ::operator delete(r.p, r.n);
    }
  };

  // Traverse clean power 2 / common size class / page sizes
  for (size_t size = 32; size <= 2 * 1024 * 1024; size *= 2) {
    test(size);
  }

  // Traverse randomized sizes
  constexpr size_t kOddIncrement = 117;
  for (size_t size = 32; size <= kMaxTestAllocSize; size += kOddIncrement) {
    test(size);
  }
}
#endif  // #ifndef __riscv

TEST(SizedDeleteTest, SizedOperatorDelete) {
#ifndef __riscv
  const size_t kMaxSize = 64 * 1024;
#else
  const size_t kMaxSize = 1024;
#endif  // #ifndef __riscv
  enum DeleteSize { kSize, kCapacity, kHalfway };
  for (size_t size = 0; size < kMaxSize; ++size) {
    for (auto delete_size : {kSize, kCapacity, kHalfway}) {
      sized_ptr_t res = __size_returning_new(size);
      switch (delete_size) {
        case kSize:
          ::operator delete(res.p, size);
          break;
        case kCapacity:
          ::operator delete(res.p, res.n);
          break;
        case kHalfway:
          ::operator delete(res.p, (size + res.n) / 2);
          break;
      }
    }
  }
}

// Helper to test pointerless & pointer-containing allocation partitioning with
// the alloc-token instrumentation as we "ignore" the cold hints for
// pointer-containing allocations provided heap partitioning is enabled.
template <typename T>
class HotColdTest : public ::testing::Test {
 public:
};
using PointerlessContainingTypes = ::testing::Types<char*>;
TYPED_TEST_SUITE(HotColdTest, PointerlessContainingTypes);

template <typename T>
static bool IsHot(uint8_t label,
                  tcmalloc::hot_cold_t threshold = kDefaultMinHotAccessHint) {
  return static_cast<tcmalloc::hot_cold_t>(label) >= threshold;
}

TYPED_TEST(HotColdTest, HotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();

  absl::flat_hash_set<uintptr_t> hot;
  absl::flat_hash_set<uintptr_t> cold;

  absl::BitGen rng;

  // Allocate some hot objects
  struct SizedPtr {
    void* ptr;
    size_t size;
  };
  constexpr size_t kSmall = 2 << 10;
  constexpr size_t kLarge = 1 << 20;

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    uint8_t label = absl::Uniform<uint8_t>(rng, 0, 127);
    label |= uint8_t{128};

    void* ptr = ::operator new(sizeof(TypeParam) * 0 + size,
                               static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size});

    EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold) << ptr;
  }

  // Delete
  for (SizedPtr s : ptrs) {
    if (expectColdTags && IsNormalMemory(s.ptr)) {
      EXPECT_TRUE(hot.insert(reinterpret_cast<uintptr_t>(s.ptr)).second);
    }

    if (absl::Bernoulli(rng, 0.2)) {
      ::operator delete(s.ptr);
    } else {
      sized_delete(s.ptr, s.size);
    }
  }

  // Allocate some cold objects
  ptrs.clear();
  for (int i = 0; i < 1000; i++) {
    size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    uint8_t label = absl::Uniform<uint8_t>(rng, 0, 127);
    label &= ~uint8_t{128};

    void* ptr = ::operator new(sizeof(TypeParam) * 0 + size,
                               static_cast<tcmalloc::hot_cold_t>(label));
    ptrs.emplace_back(SizedPtr{ptr, size});
  }

  for (SizedPtr s : ptrs) {
    if (expectColdTags && GetMemoryTag(s.ptr) == MemoryTag::kCold) {
      EXPECT_TRUE(cold.insert(reinterpret_cast<uintptr_t>(s.ptr)).second);
    }

    if (absl::Bernoulli(rng, 0.2)) {
      ::operator delete(s.ptr);
    } else {
      sized_delete(s.ptr, s.size);
    }
  }

  if (!expectColdTags) {
    return;
  }

  for (uintptr_t h : hot) {
    EXPECT_THAT(cold, testing::Not(testing::Contains(h)))
        << reinterpret_cast<void*>(h);
  }
}

hot_cold_t MinHotAccessHint() {
  return static_cast<tcmalloc::hot_cold_t>(
      TCMalloc_Internal_GetMinHotAccessHint());
}

TYPED_TEST(HotColdTest, NothrowHotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t size;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    void* ptr = ::operator new(sizeof(TypeParam) * 0 + size, std::nothrow,
                               static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size});

    if (IsHot<TypeParam>(label, MinHotAccessHint())) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << size << " " << label;
    }
  }

  for (SizedPtr s : ptrs) {
    if (absl::Bernoulli(rng, 0.2)) {
      ::operator delete(s.ptr);
    } else {
      sized_delete(s.ptr, s.size);
    }
  }
}

TYPED_TEST(HotColdTest, AlignedNothrowHotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rng, 0, 6));
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    void* ptr =
        ::operator new(sizeof(TypeParam) * 0 + size, alignment, std::nothrow,
                       static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size, alignment});

    if (IsHot<TypeParam>(label, MinHotAccessHint())) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << size << " " << label;
    }
  }

  for (auto p : ptrs) {
    if (absl::Bernoulli(rng, 0.5)) {
      ::operator delete(p.ptr, p.alignment);
    } else {
      ::operator delete(p.ptr, p.size, p.alignment);
    }
  }
}

TYPED_TEST(HotColdTest, ArrayNothrowHotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t size;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    void* ptr = ::operator new[](sizeof(TypeParam) * 0 + size, std::nothrow,
                                 static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size});

    if (IsHot<TypeParam>(label, MinHotAccessHint())) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << size << " " << label;
    }
  }

  for (SizedPtr s : ptrs) {
    if (absl::Bernoulli(rng, 0.2)) {
      ::operator delete(s.ptr);
    } else {
      sized_delete(s.ptr, s.size);
    }
  }
}

TYPED_TEST(HotColdTest, ArrayAlignedNothrowHotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rng, 0, 6));
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    void* ptr =
        ::operator new[](sizeof(TypeParam) * 0 + size, alignment, std::nothrow,
                         static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size, alignment});

    if (IsHot<TypeParam>(label, MinHotAccessHint())) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << size << " " << label;
    }
  }

  for (auto p : ptrs) {
    if (absl::Bernoulli(rng, 0.5)) {
      ::operator delete[](p.ptr, p.alignment);
    } else {
      sized_array_aligned_delete(p.ptr, p.size, p.alignment);
    }
  }
}

TYPED_TEST(HotColdTest, SizeReturningHotColdNew) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t requested;
    size_t actual;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t requested = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    auto [ptr, actual] = __size_returning_new_hot_cold(
        sizeof(TypeParam) * 0 + requested, static_cast<hot_cold_t>(label));
    ASSERT_GE(actual, requested);

    if (IsHot<TypeParam>(label, MinHotAccessHint())) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << requested << " " << label;
    }

    std::optional<size_t> allocated_size =
        MallocExtension::GetAllocatedSize(ptr);
    ASSERT_THAT(allocated_size, testing::Ne(std::nullopt));
    EXPECT_EQ(actual, *allocated_size);

    ptrs.emplace_back(SizedPtr{ptr, requested, actual});
  }

  for (auto s : ptrs) {
    const double coin = absl::Uniform(rng, 0., 1.);

    if (coin < 0.2) {
      ::operator delete(s.ptr);
    } else if (coin < 0.4) {
      // Exact size.
      sized_delete(s.ptr, s.actual);
    } else if (coin < 0.6) {
      sized_delete(s.ptr, s.requested);
    } else {
      sized_delete(s.ptr, absl::Uniform(rng, s.requested, s.actual));
    }
  }
}

// Test that setting the min_hot_access_hint parameter has the expected effect
// on treatment of the allocated data as cold.
TYPED_TEST(HotColdTest, HotColdNewMinHotFlag) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }
  // Test using a non-default threshold.
  constexpr tcmalloc::hot_cold_t kNonDefaultMinHotAccessHint =
      static_cast<tcmalloc::hot_cold_t>(10);
  Parameters::set_min_hot_access_hint(kNonDefaultMinHotAccessHint);

  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  absl::BitGen rng;

  // Allocate some objects
  struct SizedPtr {
    void* ptr;
    size_t size;
  };

  std::vector<SizedPtr> ptrs;
  ptrs.reserve(1000);
  for (int i = 0; i < 1000; i++) {
    const size_t size = absl::LogUniform<size_t>(rng, kSmall, kLarge);
    const uint8_t label = absl::Uniform<uint8_t>(rng, 0, 255);

    void* ptr = ::operator new(sizeof(TypeParam) * 0 + size,
                               static_cast<tcmalloc::hot_cold_t>(label));

    ptrs.emplace_back(SizedPtr{ptr, size});

    // The hotness threshold should have been set to kNonDefaultMinHotAccessHint
    // above via SetFlag.
    if (IsHot<TypeParam>(label, /*threshold=*/kNonDefaultMinHotAccessHint)) {
      EXPECT_NE(GetMemoryTag(ptr), MemoryTag::kCold);
    } else {
      EXPECT_TRUE(!IsNormalMemory(ptr)) << size << " " << label;
    }
  }

  for (SizedPtr s : ptrs) {
    if (absl::Bernoulli(rng, 0.2)) {
      ::operator delete(s.ptr);
    } else {
      sized_delete(s.ptr, s.size);
    }
  }

  // Reset parameter to default.
  Parameters::set_min_hot_access_hint(kDefaultMinHotAccessHint);
}

TYPED_TEST(HotColdTest, SampleHasRuntimeHint) {
  const bool expectColdTags = tcmalloc_internal::ColdFeatureActive();
  if (!expectColdTags) {
    GTEST_SKIP() << "Cold allocations not enabled";
  }

  constexpr size_t kSmall = 128 << 10;
  constexpr size_t kLarge = 1 << 20;

  std::array<void*, 6> ptrs;
  ScopedAlwaysSample always_sample;
  auto token = MallocExtension::StartAllocationProfiling();
  {
    ptrs[0] = operator new(sizeof(TypeParam) * 0 + kLarge,
                           static_cast<hot_cold_t>(1));
    ptrs[1] = operator new(sizeof(TypeParam) * 0 + kSmall,
                           static_cast<hot_cold_t>(2));
    ptrs[2] = operator new(sizeof(TypeParam) * 0 + kLarge, std::nothrow,
                           static_cast<hot_cold_t>(3));
    ptrs[3] = operator new(sizeof(TypeParam) * 0 + kSmall, std::nothrow,
                           static_cast<hot_cold_t>(4));
    ptrs[4] = operator new(sizeof(TypeParam) * 0 + kLarge, std::align_val_t{8},
                           std::nothrow, static_cast<hot_cold_t>(5));
    ptrs[5] = operator new(sizeof(TypeParam) * 0 + kSmall, std::align_val_t{8},
                           std::nothrow, static_cast<hot_cold_t>(6));
  }
  auto profile = std::move(token).Stop();

  ::operator delete(std::exchange(ptrs[4], nullptr), std::align_val_t{8});
  ::operator delete(std::exchange(ptrs[5], nullptr), std::align_val_t{8});

  for (auto* ptr : ptrs) {
    ::operator delete(ptr);
  }

  std::vector<hot_cold_t> hints;
  profile.Iterate([&](const Profile::Sample& sample) {
    // Filter out allocations that can't be ours.
    if (sample.requested_size != kSmall && sample.requested_size != kLarge) {
      return;
    }

    hints.push_back(sample.access_hint);
  });

  EXPECT_THAT(hints, testing::IsSupersetOf({
                         static_cast<hot_cold_t>(1),
                         static_cast<hot_cold_t>(2),
                         static_cast<hot_cold_t>(3),
                         static_cast<hot_cold_t>(4),
                         static_cast<hot_cold_t>(5),
                         static_cast<hot_cold_t>(6),
                     }));
}

// Test that when we use size-returning new, we can pass any of the sizes
// between the requested size and the allocated size to sized-delete.
// We follow
// https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p0901r9.html#sizeddelete.
TEST(MallocExtension, SizeReturningNewAndSizedDelete) {
#ifndef __cpp_sized_deallocation
  GTEST_SKIP() << "No sized deallocation.";
#else
  // Turn off guarded sampling, since it will cause us to return the exact size
  // when we do sample and guard.
  //
  // This is not interesting for testing operator delete with a different size.
  ScopedGuardedSamplingInterval gs(-1);

  for (int i = 0; i < 100; ++i) {
    tcmalloc::sized_ptr_t sized_ptr = __size_returning_new(i);
    ::operator delete(sized_ptr.p, sized_ptr.n);
    for (int j = i, end = sized_ptr.n; j < end; ++j) {
      sized_ptr = __size_returning_new(i);
      EXPECT_EQ(end, sized_ptr.n) << i << "," << j;
      ::operator delete(sized_ptr.p, j);
    }
  }
#endif
}

TEST(TCMalloc, malloc_info) {
  char* buf = nullptr;
  size_t size = 0;
  FILE* fp = open_memstream(&buf, &size);
  ASSERT_NE(fp, nullptr);
  EXPECT_EQ(malloc_info(0, fp), 0);
  EXPECT_EQ(fclose(fp), 0);
  ASSERT_NE(buf, nullptr);
  EXPECT_EQ(absl::string_view(buf, size), "<malloc></malloc>\n");
  free(buf);
}

TEST(Check, CustomTypes) {
  Length len1(1), len2(2);
  TC_CHECK_NE(len1, len2);
  EXPECT_DEATH(
      TC_CHECK_EQ(len1, len2),
      absl::StrFormat("len1 == len2 \\(%u == %u\\)", kPageSize, 2 * kPageSize));

  HugeLength hlen1(1.0), hlen2(2.0);
  TC_CHECK_NE(hlen1, hlen2);
  EXPECT_DEATH(TC_CHECK_EQ(hlen1, hlen2),
               absl::StrFormat("hlen1 == hlen2 \\(%u == %u\\)", kHugePageSize,
                               2 * kHugePageSize));

  PageId page1{1}, page2{2};
  TC_CHECK_NE(page1, page2);
  EXPECT_DEATH(TC_CHECK_EQ(page1, page2),
               absl::StrFormat("page1 == page2 \\(0x%zx == 0x%zx\\)", kPageSize,
                               2 * kPageSize));

  HugePage hpage1{1}, hpage2{2};
  TC_CHECK_NE(hpage1, hpage2);
  EXPECT_DEATH(TC_CHECK_EQ(hpage1, hpage2),
               absl::StrFormat("hpage1 == hpage2 \\(0x%zx == 0x%zx\\)",
                               kHugePageSize, 2 * kHugePageSize));

  absl::Duration dur1(absl::Seconds(1));
  absl::Duration dur2(absl::Seconds(2));
  TC_CHECK_NE(dur1, dur2);
  EXPECT_DEATH(TC_CHECK_EQ(dur1, dur2), "dur1 == dur2 \\(1s == 2s\\)");
}

TEST(TCMalloc, MallocUsableSizeNullptr) {
  EXPECT_EQ(malloc_usable_size(nullptr), 0);
}

}  // namespace
}  // namespace tcmalloc::tcmalloc_internal
